From 64ffe412a0c46e51d30709a7ae223c3cf32ebb56 Mon Sep 17 00:00:00 2001 From: "kaf24@firebug.cl.cam.ac.uk" Date: Thu, 21 Apr 2005 09:14:12 +0000 Subject: [PATCH] bitkeeper revision 1.1350 (42676ee4BkgqwvPiIyB44k55uY8cSA) Fix blkdev suspend/resume. Signed-off-by: Keir Fraser --- .../arch/xen/kernel/gnttab.c | 20 +- .../drivers/xen/blkfront/blkfront.c | 249 ++++++++++-------- 2 files changed, 146 insertions(+), 123 deletions(-) diff --git a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c index 36b4951b15..28b2ba6fc7 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c +++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c @@ -330,34 +330,36 @@ int gnttab_resume(void) setup.nr_frames = NR_GRANT_FRAMES; setup.frame_list = frames; - if ( HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0 ) - BUG(); - if ( setup.status != 0 ) - BUG(); + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0); + BUG_ON(setup.status != 0); for ( i = 0; i < NR_GRANT_FRAMES; i++ ) set_fixmap_ma(FIX_GNTTAB_END - i, frames[i] << PAGE_SHIFT); - shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB_END); - - for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) - gnttab_free_list[i] = i + 1; - return 0; } int gnttab_suspend(void) { int i; + for ( i = 0; i < NR_GRANT_FRAMES; i++ ) clear_fixmap(FIX_GNTTAB_END - i); + return 0; } static int __init gnttab_init(void) { + int i; + BUG_ON(gnttab_resume()); + shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB_END); + + for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) + gnttab_free_list[i] = i + 1; + /* * /proc/xen/grant : used by libxc to access grant tables */ diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c index abfa6b5e18..311b8398ed 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c @@ -94,37 +94,38 @@ static domid_t rdomid = 0; static grant_ref_t gref_head, gref_terminal; #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE) +#define GRANTREF_INVALID (1<<15) #endif -unsigned long rec_ring_free; -blkif_request_t rec_ring[BLK_RING_SIZE]; +static struct blk_shadow { + blkif_request_t req; + unsigned long request; + unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +} blk_shadow[BLK_RING_SIZE]; +unsigned long blk_shadow_free; -static int recovery = 0; /* "Recovery in progress" flag. Protected - * by the blkif_io_lock */ +static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */ static void kick_pending_request_queues(void); int __init xlblk_init(void); -void blkif_completion( blkif_request_t *req ); +static void blkif_completion(struct blk_shadow *s); -static inline int GET_ID_FROM_FREELIST( void ) +static inline int GET_ID_FROM_FREELIST(void) { - unsigned long free = rec_ring_free; - + unsigned long free = blk_shadow_free; BUG_ON(free > BLK_RING_SIZE); - - rec_ring_free = rec_ring[free].id; - - rec_ring[free].id = 0x0fffffee; /* debug */ - + blk_shadow_free = blk_shadow[free].req.id; + blk_shadow[free].req.id = 0x0fffffee; /* debug */ return free; } -static inline void ADD_ID_TO_FREELIST( unsigned long id ) +static inline void ADD_ID_TO_FREELIST(unsigned long id) { - rec_ring[id].id = rec_ring_free; - rec_ring_free = id; + blk_shadow[id].req.id = blk_shadow_free; + blk_shadow[id].request = 0; + blk_shadow_free = id; } @@ -138,41 +139,31 @@ static int sg_operation = -1; #define DISABLE_SCATTERGATHER() (sg_operation = -1) #endif -static inline void translate_req_to_pfn(blkif_request_t *xreq, - blkif_request_t *req) +static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r) { +#ifndef CONFIG_XEN_BLKDEV_GRANT int i; +#endif - xreq->operation = req->operation; - xreq->nr_segments = req->nr_segments; - xreq->device = req->device; - /* preserve id */ - xreq->sector_number = req->sector_number; + s->req = *r; - for ( i = 0; i < req->nr_segments; i++ ) -#ifdef CONFIG_XEN_BLKDEV_GRANT - xreq->frame_and_sects[i] = req->frame_and_sects[i]; -#else - xreq->frame_and_sects[i] = machine_to_phys(req->frame_and_sects[i]); +#ifndef CONFIG_XEN_BLKDEV_GRANT + for ( i = 0; i < r->nr_segments; i++ ) + s->req.frame_and_sects[i] = machine_to_phys(r->frame_and_sects[i]); #endif } -static inline void translate_req_to_mfn(blkif_request_t *xreq, - blkif_request_t *req) +static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s) { +#ifndef CONFIG_XEN_BLKDEV_GRANT int i; +#endif - xreq->operation = req->operation; - xreq->nr_segments = req->nr_segments; - xreq->device = req->device; - xreq->id = req->id; /* copy id (unlike above) */ - xreq->sector_number = req->sector_number; + *r = s->req; - for ( i = 0; i < req->nr_segments; i++ ) -#ifdef CONFIG_XEN_BLKDEV_GRANT - xreq->frame_and_sects[i] = req->frame_and_sects[i]; -#else - xreq->frame_and_sects[i] = phys_to_machine(req->frame_and_sects[i]); +#ifndef CONFIG_XEN_BLKDEV_GRANT + for ( i = 0; i < s->req.nr_segments; i++ ) + r->frame_and_sects[i] = phys_to_machine(s->req.frame_and_sects[i]); #endif } @@ -185,8 +176,6 @@ static inline void flush_requests(void) } - - /************************** KERNEL VERSION 2.6 **************************/ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) @@ -208,7 +197,6 @@ static void vbd_update(void) static void kick_pending_request_queues(void) { - if ( (xlbd_blk_queue != NULL) && test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags) ) { @@ -218,7 +206,6 @@ static void kick_pending_request_queues(void) */ xlbd_blk_queue->request_fn(xlbd_blk_queue); } - } @@ -243,9 +230,8 @@ int blkif_release(struct inode *inode, struct file *filep) * When usage drops to zero it may allow more VBD updates to occur. * Update of usage count is protected by a per-device semaphore. */ - if (--di->mi->usage == 0) { + if ( --di->mi->usage == 0 ) vbd_update(); - } return 0; } @@ -259,8 +245,8 @@ int blkif_ioctl(struct inode *inode, struct file *filep, DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", command, (long)argument, inode->i_rdev); - switch (command) { - + switch ( command ) + { case HDIO_GETGEO: /* return ENOSYS to use defaults */ return -ENOSYS; @@ -312,7 +298,7 @@ static int blkif_queue_request(struct request *req) /* Fill out a communications ring structure. */ ring_req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt); id = GET_ID_FROM_FREELIST(); - rec_ring[id].id = (unsigned long) req; + blk_shadow[id].request = (unsigned long)req; ring_req->id = id; ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : @@ -341,8 +327,12 @@ static int blkif_queue_request(struct request *req) buffer_ma >> PAGE_SHIFT, rq_data_dir(req) ); + blk_shadow[id].frame[ring_req->nr_segments] = + buffer_ma >> PAGE_SHIFT; + ring_req->frame_and_sects[ring_req->nr_segments++] = (((u32) ref) << 16) | (fsect << 3) | lsect; + #else ring_req->frame_and_sects[ring_req->nr_segments++] = buffer_ma | (fsect << 3) | lsect; @@ -353,7 +343,7 @@ static int blkif_queue_request(struct request *req) blk_ring.req_prod_pvt++; /* Keep a private copy so we can reissue requests when recovering. */ - translate_req_to_pfn(&rec_ring[id], ring_req); + pickle_request(&blk_shadow[id], ring_req); return 0; } @@ -372,8 +362,10 @@ void do_blkif_request(request_queue_t *rq) queued = 0; - while ((req = elv_next_request(rq)) != NULL) { - if (!blk_fs_request(req)) { + while ( (req = elv_next_request(rq)) != NULL ) + { + if ( !blk_fs_request(req) ) + { end_request(req, 0); continue; } @@ -383,19 +375,23 @@ void do_blkif_request(request_queue_t *rq) blk_stop_queue(rq); break; } + DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n", req, req->cmd, req->sector, req->current_nr_sectors, req->nr_sectors, req->buffer, rq_data_dir(req) ? "write" : "read"); + blkdev_dequeue_request(req); - if (blkif_queue_request(req)) { + if ( blkif_queue_request(req) ) + { blk_stop_queue(rq); break; } + queued++; } - if (queued != 0) + if ( queued != 0 ) flush_requests(); } @@ -424,11 +420,12 @@ static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) unsigned long id; bret = RING_GET_RESPONSE(&blk_ring, i); - id = bret->id; - req = (struct request *)rec_ring[id].id; - blkif_completion( &rec_ring[id] ); + id = bret->id; + req = (struct request *)blk_shadow[id].request; - ADD_ID_TO_FREELIST(id); /* overwrites req */ + blkif_completion(&blk_shadow[id]); + + ADD_ID_TO_FREELIST(id); switch ( bret->operation ) { @@ -437,7 +434,7 @@ static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) DPRINTK("Bad return from blkdev data request: %x\n", bret->status); - + if ( unlikely(end_that_request_first (req, (bret->status == BLKIF_RSP_OKAY), @@ -813,10 +810,9 @@ static int blkif_queue_request(unsigned long id, blk_ring.req_prod_pvt - 1); bh = (struct buffer_head *)id; - bh->b_reqnext = (struct buffer_head *)rec_ring[req->id].id; - - rec_ring[req->id].id = id; - + bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request; + blk_shadow[req->id].request = (unsigned long)id; + #ifdef CONFIG_XEN_BLKDEV_GRANT /* install a grant reference. */ ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); @@ -828,6 +824,9 @@ static int blkif_queue_request(unsigned long id, buffer_ma >> PAGE_SHIFT, ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); + blk_shadow[id].frame[req->nr_segments] = + buffer_ma >> PAGE_SHIFT; + req->frame_and_sects[req->nr_segments] = (((u32) ref ) << 16) | (fsect << 3) | lsect; #else @@ -840,7 +839,7 @@ static int blkif_queue_request(unsigned long id, DISABLE_SCATTERGATHER(); /* Update the copy of the request in the recovery ring. */ - translate_req_to_pfn(&rec_ring[req->id], req ); + pickle_request(&blk_shadow[req->id], req ); return 0; } @@ -864,7 +863,7 @@ static int blkif_queue_request(unsigned long id, req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt); xid = GET_ID_FROM_FREELIST(); - rec_ring[xid].id = id; + blk_shadow[xid].request = (unsigned long)id; req->id = xid; req->operation = operation; @@ -882,13 +881,15 @@ static int blkif_queue_request(unsigned long id, buffer_ma >> PAGE_SHIFT, ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); + blk_shadow[xid].frame[0] = buffer_ma >> PAGE_SHIFT; + req->frame_and_sects[0] = (((u32) ref)<<16) | (fsect<<3) | lsect; #else req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect; #endif /* Keep a private copy so we can reissue requests when recovering. */ - translate_req_to_pfn(&rec_ring[xid], req ); + pickle_request(&blk_shadow[xid], req); blk_ring.req_prod_pvt++; @@ -1002,9 +1003,9 @@ static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) bret = RING_GET_RESPONSE(&blk_ring, i); id = bret->id; - bh = (struct buffer_head *)rec_ring[id].id; + bh = (struct buffer_head *)blk_shadow[id].request; - blkif_completion( &rec_ring[id] ); + blkif_completion(&blk_shadow[id]); ADD_ID_TO_FREELIST(id); @@ -1083,9 +1084,9 @@ void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) id = GET_ID_FROM_FREELIST(); req_d->id = id; - rec_ring[id].id = (unsigned long) req; + blk_shadow[id].request = (unsigned long)req; - translate_req_to_pfn( &rec_ring[id], req ); + pickle_request(&blk_shadow[id], req); blk_ring.req_prod_pvt++; flush_requests(); @@ -1184,50 +1185,69 @@ static void blkif_recover(void) { int i; blkif_request_t *req; + struct blk_shadow *copy; +#ifdef CONFIG_XEN_BLKDEV_GRANT + int j; +#endif + + /* Stage 1: Make a safe copy of the shadow state. */ + copy = (struct blk_shadow *)kmalloc(sizeof(blk_shadow), GFP_KERNEL); + BUG_ON(copy == NULL); + memcpy(copy, blk_shadow, sizeof(blk_shadow)); - /* Hmm, requests might be re-ordered when we re-issue them. - * This will need to be fixed once we have barriers */ + /* Stage 2: Set up free list. */ + memset(&blk_shadow, 0, sizeof(blk_shadow)); + for ( i = 0; i < BLK_RING_SIZE; i++ ) + blk_shadow[i].req.id = i+1; + blk_shadow_free = blk_ring.req_prod_pvt; + blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; - /* Stage 1 : Find active and move to safety. */ + /* Stage 3: Find pending requests and requeue them. */ for ( i = 0; i < BLK_RING_SIZE; i++ ) { - if ( rec_ring[i].id >= PAGE_OFFSET ) + /* Not in use? */ + if ( copy[i].request == 0 ) + continue; + + /* Grab a request slot and unpickle shadow state into it. */ + req = RING_GET_REQUEST( + &blk_ring, blk_ring.req_prod_pvt); + unpickle_request(req, ©[i]); + + /* We get a new request id, and must reset the shadow state. */ + req->id = GET_ID_FROM_FREELIST(); + memcpy(&blk_shadow[req->id], ©[i], sizeof(copy[i])); + +#ifdef CONFIG_XEN_BLKDEV_GRANT + /* Rewrite any grant references invalidated by suspend/resume. */ + for ( j = 0; j < req->nr_segments; j++ ) { - req = RING_GET_REQUEST(&blk_ring, - blk_ring.req_prod_pvt); - translate_req_to_mfn(req, &rec_ring[i]); - blk_ring.req_prod_pvt++; + if ( req->frame_and_sects[j] & GRANTREF_INVALID ) + gnttab_grant_foreign_access_ref( + blkif_gref_from_fas(req->frame_and_sects[j]), + rdomid, + blk_shadow[req->id].frame[j], + rq_data_dir((struct request *) + blk_shadow[req->id].request)); + req->frame_and_sects[j] &= ~GRANTREF_INVALID; } - } + blk_shadow[req->id].req = *req; +#endif - /* Stage 2 : Set up shadow list. */ - for ( i = 0; i < blk_ring.req_prod_pvt; i++ ) - { - req = RING_GET_REQUEST(&blk_ring, i); - rec_ring[i].id = req->id; - req->id = i; - translate_req_to_pfn(&rec_ring[i], req); + blk_ring.req_prod_pvt++; } - /* Stage 3 : Set up free list. */ - for ( ; i < BLK_RING_SIZE; i++ ) - rec_ring[i].id = i+1; - rec_ring_free = blk_ring.req_prod_pvt; - rec_ring[BLK_RING_SIZE-1].id = 0x0fffffff; - - /* blk_ring->req_prod will be set when we flush_requests().*/ - wmb(); + kfree(copy); - /* Switch off recovery mode, using a memory barrier to ensure that - * it's seen before we flush requests - we don't want to miss any - * interrupts. */ recovery = 0; + + /* blk_ring->req_prod will be set when we flush_requests().*/ wmb(); /* Kicks things back into life. */ flush_requests(); - /* Now safe to left other peope use interface. */ + /* Now safe to left other people use the interface. */ blkif_state = BLKIF_STATE_CONNECTED; } @@ -1409,10 +1429,11 @@ int __init xlblk_init(void) printk(KERN_INFO "xen_blk: Initialising virtual block device driver\n"); - rec_ring_free = 0; + blk_shadow_free = 0; + memset(blk_shadow, 0, sizeof(blk_shadow)); for ( i = 0; i < BLK_RING_SIZE; i++ ) - rec_ring[i].id = i+1; - rec_ring[BLK_RING_SIZE-1].id = 0x0fffffff; + blk_shadow[i].req.id = i+1; + blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, CALLBACK_IN_BLOCKING_CONTEXT); @@ -1428,32 +1449,32 @@ void blkdev_suspend(void) void blkdev_resume(void) { +#ifdef CONFIG_XEN_BLKDEV_GRANT + int i, j; + for ( i = 0; i < BLK_RING_SIZE; i++ ) + for ( j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++ ) + blk_shadow[i].req.frame_and_sects[j] |= GRANTREF_INVALID; +#endif send_driver_status(1); } -void blkif_completion(blkif_request_t *req) +static void blkif_completion(struct blk_shadow *s) { int i; #ifdef CONFIG_XEN_BLKDEV_GRANT - grant_ref_t gref; - - for ( i = 0; i < req->nr_segments; i++ ) - { - gref = blkif_gref_from_fas(req->frame_and_sects[i]); - gnttab_release_grant_reference(&gref_head, gref); - } + for ( i = 0; i < s->req.nr_segments; i++ ) + gnttab_release_grant_reference( + &gref_head, blkif_gref_from_fas(s->req.frame_and_sects[i])); #else /* This is a hack to get the dirty logging bits set */ - switch ( req->operation ) + if ( s->req.operation == BLKIF_OP_READ ) { - case BLKIF_OP_READ: - for ( i = 0; i < req->nr_segments; i++ ) + for ( i = 0; i < s->req.nr_segments; i++ ) { - unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT; + unsigned long pfn = s->req.frame_and_sects[i] >> PAGE_SHIFT; unsigned long mfn = phys_to_machine_mapping[pfn]; xen_machphys_update(mfn, pfn); } - break; } #endif } -- 2.30.2